import re

f = open("../素材/豆瓣.html", "r", encoding="utf-8")
res = f.read()

list = re.findall('<li>(.*?)</li>', res, re.S | re.I)

list = re.findall("<p>(.*?)</p>", res, re.S | re.I)

# 去除每一项的空格
for i in range(len(list)):
    list[i] = list[i].strip()

# 去除第一项
list.pop(0)


# 爬到的内容 写入数据库


for i in range(len(list)):
    print(list[i])